## ---- Setpup ----
## Folders
dir.create("est/correlates_of_rent")
dir.create("est/correlates_of_rent/household")
dir.create("est/correlates_of_rent/neighborhood")

## lmer estimation algorithm control
strict_tol <- lmerControl(optCtrl = list(xtol_abs = 1e-8, ftol_abs = 1e-8))

## ---- Data ----
## SOEP
load("dat/proc-data/soep_imp.RData")

for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::group_by(hh_id, year) %>%
    dplyr::mutate(weight = mean(weight)) %>%
    dplyr::slice(1) %>%
    dplyr::ungroup() %>%
    dplyr::select(
      hh_id,
      year,
      year_fac,
      plz,
      kkz,
      hh_comp,
      mover,
      yrs_since_movein,
      owner,
      cold_rent_sqm,
      cold_rent_load,
      log_hinc_eq,
      home_size,
      hh_mmb,
      rent,
      cmr_arm,
      rent_pchg_2005,
      cmr_arm_pchg_2005,
      imputed_rent,
      asset_re_hom_p,
      asset_re_hom_m,
      asset_re_hom_t,
      asset_ov_ttl_t,
      weight
    )
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(hh_id) %>%
    mutate_at(
      .vars = vars(rent, cmr_arm, rent_pchg_2005, cmr_arm_pchg_2005),
      .funs = list(
        umn = ~ mean(., na.rm = T),
        cwu = ~ . - mean(., na.rm = T)
      )
    ) %>%
    ungroup()
}

# Microm (neighborhood-level)
load("dat/proc-data/plz5_f_und_b.RData")
microm_nbh <- read.dta("dat/proc-data/microm_2005_2018_nbh.dta") %>%
  mutate(plz = as.character(plz)) %>%
  mutate(plz = ifelse(nchar(plz) == 4, paste0("0", plz), plz)) %>%
  left_join(
    plz5_f_und_b %>%
      as.data.frame() %>%
      dplyr::select(-AGS05max, -geometry) %>%
      group_by(plz) %>%
      mutate_at(
        .vars = vars(rent, cmr_arm),
        .funs = list(`2005` = ~ ifelse(length(.[year == 2005]) == 0L, NA, .[year == 2005]))
      ) %>%
      mutate(
        rent_pchg_2005 = rent / rent_2005 - 1,
        rent_achg_2005 = rent - rent_2005,
        cmr_arm_pchg_2005 = cmr_arm / cmr_arm_2005 - 1,
        cmr_arm_achg_2005 = cmr_arm - cmr_arm_2005
      ) %>%
      ungroup(),
    by = c("plz", "year")
  ) %>%
  mutate(
    nbh_mmo_k_fluktu = as.numeric(droplevels(nbh_mmo_k_fluktu)) - 6.0,
    nbh_mmo_k_volumen = as.numeric(droplevels(nbh_mmo_k_volumen)) - 5.0,
    nbh_mmo_k_saldo = as.numeric(droplevels(nbh_mmo_k_saldo)) - 4.0
  ) %>%
  mutate_if(is.factor, as.character) %>%
  mutate(
    gtyp5 = case_when(
      gtyp %in% c("[1] Kernstaedte>==gr.Verdraum") ~ "urban > 500k",
      gtyp %in% c(
        "[2] Kernstaedte<==gr.Verdraum",
        "[9] Kernstaedte<==Verdansatz"
      ) ~ "urban < 500k",
      gtyp %in% c(
        "[3] Mi-zent.=hochverd.=gr.Verdraum",
        "[4] so.Gem.=hochverd.=gr.Verdraum",
        "[5] Mi-zent.=verd.=gr.Verdraum",
        "[6] so.Gem.=verd.=gr.Verdraum",
        "[10] Mi-zent.=verd.=Verdansatz",
        "[11] so.Gem.=verd.=Verdansatz"
      ) ~ "suburban",
      gtyp %in% c(
        "[7] Mi-zent.=laendl=gr.Verdraum",
        "[8] so.Gem.=laendl=gr.Verdraum",
        "[12] Mi-zent.=laendl=Verdansatz",
        "[13] so.Gem.=laendl=Verdansatz"
      ) ~ "suburban-rural",
      gtyp %in% c(
        "[14] Mi-zent.=verd.=laendl.",
        "[15] so.Gem.=verd.=laendl.",
        "[16] Mi-zent.=laendl=laendl.",
        "[17] so.Gem.=laendl=laendl."
      ) ~ "remote-rural",
      TRUE ~ NA_character_
    )
  ) %>%
  group_by(plz) %>%
  mutate_at(
    .vars = vars(rent, cmr_arm, rent_pchg_2005, cmr_arm_pchg_2005),
    .funs = list(
      umn = ~ mean(., na.rm = T),
      cwu = ~ . - mean(., na.rm = T)
    )
  ) %>%
  ungroup()
microm_nbh <- microm_nbh %>%
  mutate(year_fac = as.factor(year))



## ---- Household-level Models ----
## Outcomes
outcomes <- c("cold_rent_sqm", "asset_re_hom_t", "asset_ov_ttl_t")

outcome_requires_subset <- c(
  "cold_rent_sqm" = "owner == 0",
  "asset_re_hom_t" = "owner == 1 & year %in% c(2007, 2012, 2017)",
  "asset_ov_ttl_t" = NA_character_
)

## Main predictors
predictors <- c("rent")

## Year fixed effects?
year_fixed_effects <- c("year_fac +")

## Subsets
subset_owner <- c("owner == 0", "owner == 1")

## Model formula components
predictor_formula <-
  "[_pred_]_cwu + [_pred_]_umn +"


## ---- Compile model code ----
model_formula <-
  'mod <- lmer(
  [_yvar_] ~
    [_xvars_]
    (1 | hh_id) + (1 | plz) + (1 | kkz),
  data = [_data_],
  control = strict_tol,
  weights = weight
)'

model_formula_list <- list()
counter <- 0L
for (outcome in outcomes) {
  for (predictor in predictors) {
    for (year_fe in year_fixed_effects) {
      ## Covariates
      predictor_formula_local <-
        gsub("\\[\\_pred\\_\\]", predictor, predictor_formula)
      
      xvars <- predictor_formula_local
      
      if (magrittr::not(is.na(year_fe))) {
        xvars <- paste(xvars, year_fe, sep = " \n ")
      }
      
      
      if (magrittr::not(is.na(outcome_requires_subset[outcome]))) {
        ## Counter
        counter <- counter + 1L
        
        ## Data
        data <- "soep_imp$imputations[[m]]"
        data <- paste(data,
                      paste0("filter(", outcome_requires_subset[outcome], ")"),
                      sep = " %>% \n ")
        
        ## Combine
        model_formula_local <-
          gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
        model_formula_local <-
          gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
        model_formula_local <-
          gsub("\\[\\_data\\_\\]", data, model_formula_local)
        
        ## Return
        model_formula_list[[counter]] <-
          list(
            level = "household",
            outcome = outcome,
            predictor = predictor,
            owner = outcome_requires_subset[outcome],
            year_fe = magrittr::not(is.na(year_fe)),
            formula = model_formula_local
          )
      } else {
        for (owner in subset_owner) {
          ## Counter
          counter <- counter + 1L
          
          ## Data
          data <- "soep_imp$imputations[[m]]"
          data <- paste(data, paste0("filter(", owner, ")"), sep = " %>% \n ")
          
          if (outcome == "asset_ov_ttl_t") {
            data <- paste(data, "filter(year %in% c(2007, 2012, 2017))", sep = " %>% \n ")
          }
          
          ## Combine
          model_formula_local <-
            gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
          model_formula_local <-
            gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
          model_formula_local <-
            gsub("\\[\\_data\\_\\]", data, model_formula_local)
          
          ## Return
          model_formula_list[[counter]] <-
            list(
              level = "household",
              outcome = outcome,
              predictor = predictor,
              owner = owner,
              year_fe = magrittr::not(is.na(year_fe)),
              formula = model_formula_local
            )
        }
      }
    }
  }
}


## ---- Estimation ----
## Capture start time
ptm <- proc.time()[3]

for (analysis in seq_along(model_formula_list)) {
  ## Estimate model in parallel
  current_formula <- model_formula_list[[analysis]]$formula
  
  ## Set up cluster for parallelized computation across imputations
  cl <- makeCluster(length(soep_imp$imputations),
                    outfile = "est/correlates_of_rent_hh.txt",
                    type = "FORK")
  est <- parLapply(cl, seq_along(soep_imp$imputations), function (m) {
    eval(parse(text = current_formula))
  })
  
  ## Exit parallel computation
  stopCluster(cl)
  
  ## Export output
  # Model matrix (stacked)
  X <- do.call(rbind, lapply(est, function(obj)
    obj@pp$X))
  mod_summary <- lapply(est, summary)
  
  # Estimates
  data.frame(
    model_id = analysis,
    outcome = model_formula_list[[analysis]]$outcome,
    predictor = model_formula_list[[analysis]]$predictor,
    owner = model_formula_list[[analysis]]$owner,
    year_fe = model_formula_list[[analysis]]$year_fe,
    N_obs = nrow(est[[1]]@pp$X),
    N_hh  = nlevels(est[[1]]@flist$hh_id),
    N_plz = nlevels(est[[1]]@flist$plz),
    N_kkz = nlevels(est[[1]]@flist$kkz),
    convergence_imp1 = est[[1]]@optinfo$conv$opt,
    convergence_imp2 = est[[2]]@optinfo$conv$opt,
    convergence_imp3 = est[[3]]@optinfo$conv$opt,
    convergence_imp4 = est[[4]]@optinfo$conv$opt,
    convergence_imp5 = est[[5]]@optinfo$conv$opt,
    sigma_res_imp1 = mod_summary[[1]]$sigma,
    sigma_id_imp1  = attr(mod_summary[[1]]$varcor$hh_id, "stddev"),
    sigma_plz_imp1 = attr(mod_summary[[1]]$varcor$plz, "stddev"),
    sigma_kkz_imp1 = attr(mod_summary[[1]]$varcor$kkz, "stddev"),
    sigma_res_imp2 = mod_summary[[2]]$sigma,
    sigma_id_imp2  = attr(mod_summary[[2]]$varcor$hh_id, "stddev"),
    sigma_plz_imp2 = attr(mod_summary[[2]]$varcor$plz, "stddev"),
    sigma_kkz_imp2 = attr(mod_summary[[2]]$varcor$kkz, "stddev"),
    sigma_res_imp3 = mod_summary[[3]]$sigma,
    sigma_id_imp3  = attr(mod_summary[[3]]$varcor$hh_id, "stddev"),
    sigma_plz_imp3 = attr(mod_summary[[3]]$varcor$plz, "stddev"),
    sigma_kkz_imp3 = attr(mod_summary[[3]]$varcor$kkz, "stddev"),
    sigma_res_imp4 = mod_summary[[4]]$sigma,
    sigma_id_imp4  = attr(mod_summary[[4]]$varcor$hh_id, "stddev"),
    sigma_plz_imp4 = attr(mod_summary[[4]]$varcor$plz, "stddev"),
    sigma_kkz_imp4 = attr(mod_summary[[4]]$varcor$kkz, "stddev"),
    sigma_res_imp5 = mod_summary[[5]]$sigma,
    sigma_id_imp5  = attr(mod_summary[[5]]$varcor$hh_id, "stddev"),
    sigma_plz_imp5 = attr(mod_summary[[5]]$varcor$plz, "stddev"),
    sigma_kkz_imp5 = attr(mod_summary[[5]]$varcor$kkz, "stddev")
  ) %>%
    write.csv(paste0(
      "est/correlates_of_rent/household/",
      "model_info",
      analysis,
      ".csv"
    ))
  
  model_coefficients <- cbind(
    data.frame(
      model_id = analysis,
      x_names = names(fixef(est[[1]])),
      x_means = apply(X, 2, mean),
      x_min = apply(X, 2, min),
      x_max = apply(X, 2, max),
      b_imp1 = fixef(est[[1]]),
      b_imp2 = fixef(est[[2]]),
      b_imp3 = fixef(est[[3]]),
      b_imp4 = fixef(est[[4]]),
      b_imp5 = fixef(est[[5]])
    ),
    as.data.frame(as.matrix(vcov(est[[1]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp1")),
    as.data.frame(as.matrix(vcov(est[[2]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp2")),
    as.data.frame(as.matrix(vcov(est[[3]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp3")),
    as.data.frame(as.matrix(vcov(est[[4]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp4")),
    as.data.frame(as.matrix(vcov(est[[5]]))) %>%
      rename_all(function (x)
        paste0(x, "_imp5"))
  ) %>%
    write.csv(
      paste0(
        "est/correlates_of_rent/household/",
        "model_coefficients",
        analysis,
        ".csv"
      )
    )
  
  ## Clear memory
  rm(est, X, mod_summary)
  gc(reset = TRUE)
  
  ## Progress
  print(paste0(
    "Analysis: ",
    analysis,
    ". Time elapsed: ",
    round((proc.time()[3] - ptm) / 60, 2),
    " minutes."
  ))
}


## ---- Collect output ----
model_files <- list.files("est/correlates_of_rent/household")
model_info_files <- model_files[startsWith(model_files, "model_info")]
model_coefficients_files <-
  model_files[startsWith(model_files, "model_coefficients")]

## Model info
model_info <- data.frame()
for (file in model_info_files) {
  model_info <- model_info %>%
    bind_rows(read.csv(paste0(
      "est/correlates_of_rent/household/", file
    )))
}

model_info <- model_info %>%
  dplyr::select(-X) %>%
  arrange(model_id)

write.csv(model_info,
          file = "est/info_correlates_of_rent_hh.csv",
          row.names = FALSE)

## Model coefficients
model_coefficients <- data.frame()
for (file in model_coefficients_files) {
  model_coefficients <- model_coefficients %>%
    bind_rows(read.csv(paste0(
      "est/correlates_of_rent/household/", file
    )))
}

model_coefficients <- model_coefficients %>%
  dplyr::select(-X) %>%
  arrange(model_id)

write.csv(model_coefficients,
          file = "est/coef_correlates_of_rent_hh.csv",
          row.names = FALSE)

save(model_info, model_coefficients, file = "est/correlates_of_rent_hh.rdata")


## ---- Neighborhood-level Models ----
## Outcomes
outcomes <- c(
  "nbh_mlp_k_status",
  "nbh_kkr_w_proeinw"
)

## Main predictors
predictors <- c("rent")

## Year fixed effects?
year_fixed_effects <- c("year_fac +")

## Model formula components
predictor_formula <-
  "[_pred_]_cwu + [_pred_]_umn +"

## ---- Compile model code ----
model_formula <-
  'mod <- lmer(
  [_yvar_] ~
    [_xvars_]
    (1 | plz) + (1 | kkz),
  data = [_data_],
  control = strict_tol
)'

model_formula_list <- list()
counter <- 0L
for (outcome in outcomes) {
  for (predictor in predictors) {
    for (year_fe in year_fixed_effects) {
      ## Covariates
      predictor_formula_local <-
        gsub("\\[\\_pred\\_\\]", predictor, predictor_formula)
      
      xvars <- predictor_formula_local
      
      if (magrittr::not(is.na(year_fe))) {
        xvars <- paste(xvars, year_fe, sep = " \n ")
      }
      
      ## Counter
      counter <- counter + 1L
      
      ## Data
      data <- "microm_nbh"
      
      ## Combine
      model_formula_local <-
        gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
      model_formula_local <-
        gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
      model_formula_local <-
        gsub("\\[\\_data\\_\\]", data, model_formula_local)
      
      ## Return
      model_formula_list[[counter]] <-
        list(
          level = "neighborhood",
          outcome = outcome,
          predictor = predictor,
          year_fe = magrittr::not(is.na(year_fe)),
          formula = model_formula_local
        )
    }
  }
}


## ---- Estimation ----
## Capture start time
ptm <- proc.time()[3]

for (analysis in seq_along(model_formula_list)) {
  ## Estimate model
  eval(parse(text = model_formula_list[[analysis]]$formula))
  
  ## Export output
  # Model matrix (stacked)
  X <- mod@pp$X
  mod_summary <- summary(mod)
  
  # Estimates
  data.frame(
    model_id = analysis,
    outcome = model_formula_list[[analysis]]$outcome,
    predictor = model_formula_list[[analysis]]$predictor,
    year_fe = model_formula_list[[analysis]]$year_fe,
    N_pl8 = nrow(X),
    N_plz = nlevels(mod@flist$plz),
    N_kkz = nlevels(mod@flist$kkz),
    convergence = mod@optinfo$conv$opt,
    sigma_res = mod_summary$sigma,
    sigma_plz = attr(mod_summary$varcor$plz, "stddev"),
    sigma_kkz = attr(mod_summary$varcor$kkz, "stddev")
  ) %>%
    write.csv(paste0(
      "est/correlates_of_rent/neighborhood/",
      "model_info",
      analysis,
      ".csv"
    ))
  
  model_coefficients <- cbind(
    data.frame(
      model_id = analysis,
      x_names = names(fixef(mod)),
      x_means = apply(X, 2, mean),
      x_min = apply(X, 2, min),
      x_max = apply(X, 2, max),
      b = fixef(mod)
    ),
    as.data.frame(as.matrix(vcov(mod)))
  ) %>%
    write.csv(
      paste0(
        "est/correlates_of_rent/neighborhood/",
        "model_coefficients",
        analysis,
        ".csv"
      )
    )
  
  ## Clear memory
  rm(mod, X, mod_summary)
  gc(reset = TRUE)
  
  ## Progress
  print(paste0(
    "Analysis: ",
    analysis,
    ". Time elapsed: ",
    round((proc.time()[3] - ptm) / 60, 2),
    " minutes."
  ))
}


## ---- Collect output ----
model_files <- list.files("est/correlates_of_rent/neighborhood")
model_info_files <- model_files[startsWith(model_files, "model_info")]
model_coefficients_files <-
  model_files[startsWith(model_files, "model_coefficients")]

## Model info
model_info <- data.frame()
for (file in model_info_files) {
  model_info <- model_info %>%
    bind_rows(read.csv(paste0(
      "est/correlates_of_rent/neighborhood/", file
    )))
}

model_info <- model_info %>%
  dplyr::select(-X) %>%
  arrange(model_id)

write.csv(model_info,
          file = "est/info_correlates_of_rent_nbh.csv",
          row.names = FALSE)

## Model coefficients
model_coefficients <- data.frame()
for (file in model_coefficients_files) {
  model_coefficients <- model_coefficients %>%
    bind_rows(read.csv(paste0(
      "est/correlates_of_rent/neighborhood/", file
    )))
}

model_coefficients <- model_coefficients %>%
  dplyr::select(-X) %>%
  arrange(model_id)

write.csv(model_coefficients,
          file = "est/coef_correlates_of_rent_nbh.csv",
          row.names = FALSE)

save(model_info, model_coefficients, file = "est/correlates_of_rent_nbh.rdata")
